%real data
data = xlsread('s&p100_price.csv');
ss=size(data); N=ss(1,2)/2; tau=ss(1,1);

sp=data(:,2:2:end); %adj close (for close -> 1:2:end)
ret=log(sp); %returns
%ret=log(sp(:,1:25)); N=25; %if only 25 stocks
r=1; %number of largest eigenvalues used in LR
k=3; %order of VAR
T=tau-1; %ret_1 is used as X0

Lc=tril(ones(T,T),-1)-tril(ones(T,T),-2)+triu(ones(T,T),T-1); %cyclic lag operator

h=zeros(N,3); %3=number of lags considered for ADF
for i=1:N;
    h(i,:) = adftest(ret(:,i),'model','TS','lags',0:2,'alpha',0.05); 
    %alpha = size
    %h = 1 indicates rejection of the unit-root null in favor of the alternative model
    if h(i,1)==1;
        i
        [rej,pValue]= adftest(ret(:,i),'model','TS','lags',0:2,'alpha',0.05)
    end
end
sum(h)

X_tilde=zeros(N,T); dX=zeros(N,T);
for t=1:T;
    X_tilde(:,t)=ret(t,:)'-((t-1)/T)*(ret(tau,:)-ret(1,:))';
    dX(:,t)=(ret(t+1,:)-ret(t,:))';
end

Z0=dX; Zk=X_tilde*(Lc^(k-1))';
Z1=ones(N*(k-1)+1,T); %vector of regressors (dX_{t-1},...,dX_{t-k+1},1) 
for j=1:(k-1);
    Z1((1+N*(j-1)):N*j,:)=dX*(Lc^j)';    
end
M11=Z1*transpose(Z1)/T;
R0=Z0-(Z0*transpose(Z1)/T)*inv(M11)*Z1;
Rk=Zk-(Zk*transpose(Z1)/T)*inv(M11)*Z1;    
S00=R0*(R0)'; Skk=Rk*(Rk)'; S0k=R0*(Rk)'; Sk0=Rk*(R0)';
lambda=eig(inv(Skk)*Sk0*inv(S00)*S0k);
lambda=sort(lambda,'descend');
eig_max(1,i)=lambda(1,1);

q_alpha=0.9792; %95% quantile for rejection

p=2; q=T/N-k;
lambda_p=((sqrt(p*(p+q-1))+sqrt(q))^2)/(p+q)^2;
lambda_m=((sqrt(p*(p+q-1))-sqrt(q))^2)/(p+q)^2;
c1=log(1-lambda_p);
c2=-2^(2/3)*(lambda_p)^(2/3)/((1-lambda_p)^(1/3)*(lambda_p-lambda_m)^(1/3))*(p+q)^(-2/3);

loglambda=log(1-lambda);
%LR=(log(1-lambda_sort(1,1))-c1)/c2*N^(2/3)
LR=(sum(loglambda(1:r,1))-r*c1)/c2*N^(2/3)

%all eigenvalues
nb=40; % #of bins
figure(1)
histogram(lambda,nb,'EdgeColor','none');

%Wachter with lambda_p, lambda_m from our paper
figure(2)
%histogram(lambda,nb/2,'EdgeColor','none','Normalization','pdf'); %no edges
histogram(lambda,nb/2,'Normalization','pdf');
hold on;
fplot(@(x) (p+q)*sqrt(max(0,(lambda_p-x)*(x-lambda_m)))/x/(1-x)/2/pi, 'LineWidth', 3);
set(gca, 'XLim', [0 1])
